{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### Align text and audio using Montreal Forced Aligner (MFA)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "IPkicKwU8IWj"
   },
   "outputs": [],
   "source": [
    "%%capture\n",
    "!apt update -y\n",
    "!pip install -U pip"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "G6Z-aDd08hfk"
   },
   "outputs": [],
   "source": [
    "%%capture\n",
    "%%bash\n",
    "data_root=\"./infore_16k_denoised\"\n",
    "mkdir -p $data_root\n",
    "cd $data_root\n",
    "wget https://huggingface.co/datasets/ntt123/infore/resolve/main/infore_16k_denoised.zip -O infore.zip\n",
    "unzip infore.zip "
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "VWwgAePDXy4m"
   },
   "outputs": [],
   "source": [
    "from pathlib import Path\n",
    "\n",
    "txt_files = sorted(Path(\"./infore_16k_denoised\").glob(\"*.txt\"))\n",
    "f = open(\"/content/words.txt\", \"w\", encoding=\"utf-8\")\n",
    "for txt_file in txt_files:\n",
    "    wav_file = txt_file.with_suffix(\".wav\")\n",
    "    if not wav_file.exists():\n",
    "        continue\n",
    "    line = open(txt_file, \"r\", encoding=\"utf-8\").read()\n",
    "    for word in line.strip().lower().split():\n",
    "        f.write(word)\n",
    "        f.write(\"\\n\")\n",
    "f.close()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "FktjNXbDkBLh"
   },
   "outputs": [],
   "source": [
    "black_list = (\n",
    "    []\n",
    "    + [\"q\", \"adn\", \"h\", \"stress\", \"b\", \"k\", \"mark\", \"gas\", \"cs\", \"test\", \"l\", \"hiv\"]\n",
    "    + [\"v\", \"d\", \"c\", \"p\", \"martin\", \"visa\", \"euro\", \"laser\", \"x\", \"real\", \"shop\"]\n",
    "    + [\"studio\", \"kelvin\", \"đt\", \"pop\", \"rock\", \"gara\", \"karaoke\", \"đicr\", \"đigiúp\"]\n",
    "    + [\"khmer\", \"ii\", \"s\", \"tr\", \"xhcn\", \"casino\", \"guitar\", \"sex\", \"oxi\", \"radio\"]\n",
    "    + [\"qúy\", \"asean\", \"hlv\" \"ts\", \"video\", \"virus\", \"usd\", \"robot\", \"ph\", \"album\"]\n",
    "    + [\"s\", \"kg\", \"km\", \"g\", \"tr\", \"đ\", \"ak\", \"d\", \"m\", \"n\"]\n",
    ")"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "b3nMwfzK_g0B"
   },
   "outputs": [],
   "source": [
    "ws = open(\"/content/words.txt\").readlines()\n",
    "f = open(\"/content/lexicon.txt\", \"w\")\n",
    "for w in sorted(set(ws)):\n",
    "    w = w.strip()\n",
    "\n",
    "    # this is a hack to match phoneme set in the vietTTS repo\n",
    "    p = list(w)\n",
    "    p = \" \".join(p)\n",
    "    if w in black_list:\n",
    "        continue\n",
    "    else:\n",
    "        f.write(f\"{w}\\t{p}\\n\")\n",
    "f.close()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "WuWZKTNRt1eM"
   },
   "outputs": [],
   "source": [
    "%%writefile install_mfa.sh\n",
    "#!/bin/bash\n",
    "\n",
    "## a script to install Montreal Forced Aligner (MFA)\n",
    "\n",
    "root_dir=${1:-/tmp/mfa}\n",
    "mkdir -p $root_dir\n",
    "cd $root_dir\n",
    "\n",
    "# download miniconda3\n",
    "wget -q --show-progress https://repo.anaconda.com/miniconda/Miniconda3-latest-Linux-x86_64.sh\n",
    "bash Miniconda3-latest-Linux-x86_64.sh -b -p $root_dir/miniconda3 -f\n",
    "\n",
    "#install MFA\n",
    "$root_dir/miniconda3/bin/conda create -n aligner -c conda-forge montreal-forced-aligner=2.0.0rc7 -y\n",
    "\n",
    "echo -e \"\\n======== DONE ==========\"\n",
    "echo -e \"\\nTo activate MFA, run: source $root_dir/miniconda3/bin/activate aligner\""
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "osR7KJCNXJYq"
   },
   "outputs": [],
   "source": [
    "# download and install mfa\n",
    "INSTALL_DIR = \"/tmp/mfa\"  # path to install directory\n",
    "!bash ./install_mfa.sh {INSTALL_DIR}"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "colab": {
     "base_uri": "https://localhost:8080/"
    },
    "id": "hxbXwJZlXLPz",
    "outputId": "d3e40ec5-68a7-40ec-d070-137736d7a956"
   },
   "outputs": [],
   "source": [
    "!source {INSTALL_DIR}/miniconda3/bin/activate aligner; \\\n",
    "mfa train --clean -t ./temp -o ./infore_mfa.zip ./infore_16k_denoised lexicon.txt ./infore_textgrid"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {
    "id": "8Z65_BtXagn1"
   },
   "outputs": [],
   "source": [
    "# copy to train directory\n",
    "!mkdir -p train_data\n",
    "!cp ./infore_16k_denoised/*.wav ./train_data\n",
    "!cp ./infore_textgrid/*.TextGrid ./train_data"
   ]
  }
 ],
 "metadata": {
  "colab": {
   "collapsed_sections": [],
   "name": "align-text-audio | InfoRe using MFA v2rc7.ipynb",
   "provenance": []
  },
  "kernelspec": {
   "display_name": "Python 3",
   "name": "python3"
  },
  "language_info": {
   "name": "python"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 0
}
